####################################################################
# Lipps, Jana & Dominik Schraff. Estimating subnational preferences 
# across the European Union. Political Science Research and Methods.
####################################################################

#######################
# Online Appendix #####
# Additional Analyses #
#######################

# Session info
# R version 3.5.1 (2018-07-02)
# Platform: x86_64-w64-mingw32/x64 (64-bit)
# Running under: Windows >= 8 x64 (build 9200)

#Packages
#Version: ggplot2_3.1.0  
library(ggplot2)
#Version: lme4_1.1-17
library(lme4)
#Version: survey_3.33-2
library(survey)
#Version: glmnet_2.0-16
library(glmnet)
#Version: gridExtra_2.3
library(gridExtra)
#Version: stargazer_5.2.2
library(stargazer)
#Version: plyr_1.8.4
library(plyr)
#Version: arm_1.10-1 
library(arm)
#Version: sjstats_0.17.3
library(sjstats)
#Version: dplyr_0.7.6
library(dplyr)
#Version: BayesTree_0.3-1.4
library(BayesTree)
#Version: xtable_1.8-3
library(xtable)

#set working directory to folder containig replication files
#setwd("...")

#Load Data
load("realm_prep_2703.RData") #Reference values (means) from Flash EB 2015
load("realse_prep_2703.RData") #Reference values (std. errors) from Flash EB 2015
load("std_eb833_prep_2703.RData") #Eurobarometer 83.3 (2015) with regional predictors
load("eb77.3_flash12_disag.RData") #weighted means from Flash EB 2012 and EB 77.3 (2012)
load("eb83.3_flash15_disag.RData") #weighted means from Flash EB 2015 and EB 83.3 (2015)
load("ees14_flash15_disag.RData") #weighted means from Flash EB 2015 and EES 2014
load("eb79.5_qog13_disag.RData") #weighted means from Quality of Governance (2013) and EB 79.5 (2013)
load("subst_interpret.RData") # estimation results of the main analysis with regional predictors

#Useful functions

#RMSE (extra weight to large errors)
rmse <- function(error)
{
  sqrt(mean(error^2))
}

#MAE (equal weight to all errors)
mae <- function(error)
{
  mean(abs(error))
}

#############
#APPENDIX

#Table A2: Three-level analysis of variance in Std. EB 83.3
m0 <- glmer(eu_trust~(1|nuts)+(1|isocntry), family=binomial(link="logit"), data=std_eb)
summary(m0)
icc(m0)
#NUTS Var=0.1548, Country Var=0.224
#NUTS ICC=0.0422, Country ICC=0.0611
stargazer(m0, header=F, dep.var.caption = "Dependent variable" ,dep.var.labels = "EU trust", 
          add.lines = list(c("NUTS Var", "0.1548"), c("Country Var", "0.224"),
                             c("NUTS ICC", "0.0422"), c("Country ICC", "0.0611")),
          style = "ajps",
          font.size = "footnotesize",
          type = "latex",
          out = "Table_A2.tex")

#Table A4: Performance of Disaggregation

tablea4 <- data.frame(matrix(ncol=12, nrow=4))

#Compare Flash EB 12 values to EB 77.3 (2012)
tablea4[1,1] <- cor(disag2012$flash12mean, disag2012$eb773mean)
# cor = 0.4974249
disag2012$covrg <- 0
disag2012$covrg[(disag2012$eb773mean<disag2012$flash12.ci.u) & (disag2012$eb773mean>disag2012$flash12.ci.l)] <- 1
tablea4[1,2] <- mean(disag2012$covrg)
# cov = 0.222
tablea4[1,3] <- mae(disag2012$flash12mean - disag2012$eb773mean)
# mae = 0.1424146
tablea4[1,4] <- rmse(disag2012$flash12mean - disag2012$eb773mean)
# rmse = 0.1796655

#Compare Flash EB 15 values to Eb 83.3 (2015)
tablea4[2,5] <- cor(disag2015$flash15mean, disag2015$eb833mean)
# cor = 0.4280511
disag2015$covrg <- 0
disag2015$covrg[(disag2015$eb833mean<disag2015$flash15.ci.u) & (disag2015$eb833mean>disag2015$flash15.ci.l)] <- 1
tablea4[2,6] <- mean(disag2015$covrg)
# cov = 0.3316
tablea4[2,7] <- mae(disag2015$flash15mean - disag2015$eb833mean)
# mae = 0.1203551
tablea4[2,8] <- rmse(disag2015$flash15mean - disag2015$eb833mean)
# rmse = 0.1486539

#Compare Flash EB 15 values to EES 2014
tablea4[3,5] <- cor(disagees$flash15mean, disagees$eesmean)
# cor = 0.4030929
disagees$covrg <- 0
disagees$covrg[(disagees$eesmean<disagees$flash15.ci.u) & (disagees$eesmean>disagees$flash15.ci.l)] <- 1
tablea4[3,6] <- mean(disagees$covrg)
# cov = 0.3636
tablea4[3,7] <- mae(disagees$flash15mean - disagees$eesmean)
# mae = 0.1054077
tablea4[3,8] <- rmse(disagees$flash15mean - disagees$eesmean)
# rmse = 0.1361286

#Compare Quality of Governance (2013) to EB 79.5 (2013)
tablea4[4,9] <- cor(disagqog$qogmean, disagqog$eb795mean)
# cor = 0.327379
disagqog$covrg <- 0
disagqog$covrg[(disagqog$eb795mean<disagqog$qog.ci.u) & (disagqog$eb795mean>disagqog$qog.ci.l)] <- 1
tablea4[4,10] <- mean(disagqog$covrg)
# cov = 0.3509
tablea4[4,11] <- mae(disagqog$qogmean - disagqog$eb795mean)
# mae = 0.08399876
tablea4[4,12] <- rmse(disagqog$qogmean - disagqog$eb795mean)
# rmse = 0.1093948

tablea4 <- tablea4 %>% mutate_if(is.numeric, round, digits = 3)
rownames(tablea4) <- c("EB 77.3", "EB 83.3", "EES 2014", "EB 79.5")
colnames(tablea4) <- rep(c("cor", "cov", "MAE", "RMSE"),3)

addtorow <- list()
addtorow$pos <- list(0,0)
addtorow$command <- c("& \\multicolumn{4}{c|}{Flash EB 2012: EU Trust} & \\multicolumn{4}{c|}{Flash EB 2015: EU Trust} & \\multicolumn{4}{c|}{QoG 2013: Left Share} \\\\",
                      "& cor & cov & MAE & RMSE & cor & cov & MAE & RMSE & cor & cov & MAE & RMSE \\\\")
print(xtable(tablea4), add.to.row = addtorow, include.colnames = FALSE,
      file="Table_A4.tex")

#Table A5: The consequences of noise for substantive interpretations

a0 <- lmer(steb~(1|isocntry)+scale(log(convg_pc+1))+stdgovtrust.n2, data=interpret)
summary(a0)
b0 <- lmer(post.sj~(1|isocntry)+scale(log(convg_pc+1))+stdgovtrust.n2, data=interpret) 
summary(b0)
d0 <- lmer(bart.p.sj~(1|isocntry)+scale(log(convg_pc+1))+stdgovtrust.n2, data=interpret)
summary(d0)
c0 <- lmer(real~(1|isocntry)+scale(log(convg_pc+1))+stdgovtrust.n2, data=interpret) 
summary(c0)

stargazer(a0, b0, d0, c0, dep.var.labels = c("EB 83.3", "MrsP", "BART.p.sj", "Flash EB"),
          dep.var.caption = "Average Regional EU Trust",
          covariate.labels = c("Convergence Funding p.c. (log)", "Average Trust in Government"),
          model.numbers = T,
          style = "ajps",
          font.size = "footnotesize",
          type = "latex",
          out = "Table_A5.tex")

#Figure A2: MrsP with national parliament trust
load("std_eb833_prep_2703.RData") #Eurobarometer 83.3
load("realm_prep_2703.RData") #Reference values (means) from Flash EB
load("realse_prep_2703.RData") #Reference values (std. errors) from Flash EB
load("fullsynth.RData") #Contingency table for post-stratification

#Set contingency table for post-stratification
ps.tab <- fullsynth
ps.tab <- merge(ps.tab, unique(std_eb[,c("nuts", "stdgovtrust.n2", "stdgdp",
                                         "stdgovtrust.nat", "region")], by = "nuts"))

m1 <- glmer(eu_trust~(1|nuts)+(1|isocntry)+(1|nat.parl.trust)+(1|lr.cat)+(1|region)+
              (1|ager2) + (1|marital_r1) +
              stdgovtrust.n2+stdgdp+stdgovtrust.nat, 
            family=binomial(link="logit"), data=std_eb)
summary(m1)

ps.tab$m1_cellpred <- invlogit(fixef(m1)["(Intercept)"]
                               +ranef(m1)$nuts[as.character(ps.tab$nuts),1]
                               +ranef(m1)$region[as.character(ps.tab$region),1]
                               +ranef(m1)$isocntry[as.character(ps.tab$isocntry),1]
                               +ranef(m1)$lr.cat[as.character(ps.tab$lr.cat),1]
                               +ranef(m1)$nat.parl.trust[as.character(ps.tab$nat.parl.trust),1]
                               +ranef(m1)$ager2[as.character(ps.tab$ager2),1]
                               +ranef(m1)$marital_r1[as.character(ps.tab$marital_r1),1]
                               +fixef(m1)["stdgovtrust.n2"]*as.numeric(as.character(ps.tab$stdgovtrust.n2))
                               +fixef(m1)["stdgovtrust.nat"]*as.numeric(as.character(ps.tab$stdgovtrust.nat))
                               +fixef(m1)["stdgdp"]*as.numeric(as.character(ps.tab$stdgdp))
)

#Calculate the weighted prediction          
ps.tab$m1_wpred <- ps.tab$Share*ps.tab$m1_cellpred 

ps.tab$nuts <- as.character(ps.tab$nuts)

#Get the regional predictions
m1pred <- ddply(ps.tab, .(nuts), summarize, postm1=sum(m1_wpred, na.rm = T), 
                mlm1=mean(m1_cellpred, na.rm = T))

m1.final <- merge(realm, m1pred, by.x="row.names", by.y = "nuts")
head(m1.final)

m1.final$postm1[m1.final$postm1== 0] <- NA
m1.final <- na.omit(m1.final)
names(m1.final) <- c("nuts", "realm", "postm", "mlm")

#Pearson's rho
cor(m1.final$realm, m1.final$postm)

#Kenndall's tau
cor(m1.final$realm, m1.final$postm, method = "kendall")

#Errors
mae(m1.final$realm - m1.final$postm)
rmse(m1.final$realm - m1.final$postm)

#Plot
mrsp.plot <- ggplot(m1.final, aes(x=postm, y=realm)) +
  geom_point(size = 1) +
  geom_smooth(method = "lm") +
  geom_abline(intercept = 0, slope = 1) +
  xlim(0,1) + ylim(0,1) +
  xlab("Predicted EU Trust") +
  ylab("Real EU Trust") +
  ggtitle("MrsP") +
  annotate("text", x=.15, y=0.99, label="r == 0.60", size=3, parse=T) +
  annotate("text", x=.15, y=0.94, label="cov == 0.42", size=3, parse=T) +
  annotate("text", x=.15, y=0.89, label="MAE == 0.07", size=3, parse=T) +
  annotate("text", x=.15, y=0.84, label="RMSE == 0.09", size=3, parse=T)
print(mrsp.plot)

dev.copy2pdf(width = 8, height = 7, file = "Figure_A2.pdf")
dev.off()

# Figure A3: Social trust across Spanish regions
load("extsynth.RData")
load("contmari.RData")
load("regmeans.RData")
load("eb741_es.RData")

names(eb741.es)[names(eb741.es)=="age"] <- "ager2"

dat.train <- eb741.es[,c("nuts", "ager2", "marital_r1", "gender", "lr.cat", "soctrust_bin",
                         "gdp", "pop", "area", "edu", "inc")]

dat.train <- dat.train %>% mutate(stdgdp = scale(gdp),
                                  stdpopdr = scale(pop),
                                  stdarea = scale(area),
                                  stdedur = scale(edu),
                                  stdinc = scale(inc))

dat.train <- dat.train[,-c(7:11)]


dat.train <- na.omit(dat.train)
dat.train$nuts <- as.factor(dat.train$nuts)
#dat.train$isocntry <- as.factor(dat.train$isocntry)
#dat.train$region <- as.factor(dat.train$region)
dat.train$gender <- as.factor(dat.train$gender)
dat.train$ager2 <- as.factor(dat.train$ager2)
dat.train$marital_r1 <- as.factor(dat.train$marital_r1)
dat.train$soctrust_bin <- as.factor(dat.train$soctrust_bin)
dat.train$lr.cat <- as.factor(dat.train$lr.cat)

dat.train.x <- makeind(dat.train[,-6])

dat.test <- expand.grid(nuts=unique(as.character(dat.train$nuts)),
                        ager2 = unique(eb741.es$ager2),
                        marital_r1= unique(eb741.es$marital_r1)[1:4],
                        gender= c("Man", "Woman"),
                        lr.cat = c("Left", "Right", "Centre"))
dat.test <- merge(dat.test, unique(dat.train[,c(1,7:11)]), by = "nuts")
dat.test.x <- makeind(dat.test)

set.seed(99)
system.time(bart.synt.soc <- bart(x.train=dat.train.x,
                                  y.train=dat.train[,6],
                                  x.test=dat.test.x,
                                  binaryOffset=0,
                                  k=10))

#save(bart.cl.soc, file="BART_class_reg.RData")

plot(bart.synt.soc)
bart.synt.soc$yhat.test.p <- pnorm(bart.synt.soc$yhat.test)
bart.meds <- colMeans(bart.synt.soc$yhat.test)
bart.meds.p <- colMeans(bart.synt.soc$yhat.test.p)

dat.test$preds <-  bart.meds.p

final.bart.p <- merge(dat.test, extsynth[,c(1:9)], by=c("nuts", "gender", "ager2", "marital_r1", "lr.cat"))

#tot.share <- ddply(final.bart.p, .(nuts), summarise, totshare = sum(popshare))
#final.bart.p <- merge(final.bart.p, tot.share, by = "nuts")

#final.bart.p$popshare.adj <- final.bart.p$popshare/final.bart.p$totshare

bart.reg <- ddply(final.bart.p, .(nuts), summarize, bart.p=weighted.mean(preds, Share.adj),
                  bart=mean(preds))

results <- merge(regmeans, bart.reg, by="nuts")

#Pearson's rho
cor(results$regstrust, results[,c(3:5)])

#Kendall's tau
cor(results$regstrust, results[,c(3:5)], method = "kendall")

#Error
sapply(results$regstrust - results[,c(3:5)], mae) 

sapply(results$regstrust - results[,c(3:5)], rmse) 


disag <- ggplot(results, aes(x=ebstrust, y=regstrust, label = factor(nuts))) +
  geom_text(check_overlap = TRUE) +
  geom_smooth(method = "lm") +
  geom_abline(intercept = 0, slope = 1) +
  xlim(0.1,0.8) + ylim(0.25,0.6) +
  annotate("text", x=0.25, y=0.6, label= "Pearson's Rho = 0.67") +
  annotate("text", x=0.25, y=0.59, label= "Kendall's Tau = 0.28") +
  annotate("text", x=0.25, y=0.58, label= "RMSE = 0.16") +
  annotate("text", x=0.25, y=0.57, label= "MAE = 0.13") +
  ylab("Real Social Trust") +
  xlab("Predicted Social Trust") +
  ggtitle("Disaggregation") +
  theme(plot.title = element_text(hjust = 0.5))

bartp <- ggplot(results, aes(x=bart.p, y=regstrust, label = factor(nuts))) +
  geom_text(check_overlap = TRUE) +
  geom_smooth(method = "lm") +
  geom_abline(intercept = 0, slope = 1) +
  xlim(0.1,0.8) + ylim(0.25,0.6) +
  annotate("text", x=0.25, y=0.6, label= "Pearson's Rho = 0.81") +
  annotate("text", x=0.25, y=0.59, label= "Kendall's Tau = 0.63") +
  annotate("text", x=0.25, y=0.58, label= "RMSE = 0.09") +
  annotate("text", x=0.25, y=0.57, label= "MAE = 0.08") +
  ylab("Real Social Trust") +
  xlab("Predicted Social Trust") +
  ggtitle("Synthetic BART") +
  theme(plot.title = element_text(hjust = 0.5))

grid.arrange(disag, bartp, nrow = 1)
dev.copy2pdf(width = 8, height = 7, file = "Figure_A3.pdf")
dev.off()




